# -*- coding: utf-8 -*-
import copy
from zc_core.spiders.base import BaseSpider
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter

from njsc.rules import *


class FullSpider(BaseSpider):
    name = "full"
    # 详情页url
    item_url = "https://www.njsc365.com/api/index.php/item-{}.html?output=json"
    price_url = "https://www.njsc365.com/api/index.php/item-ajax_product_price-{}.html?output=json"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):
        settings = get_project_settings()
        sku_list = SkuPoolDao().get_sku_pool_list(
            fields={"_id": 1, "catalog3Id": 1, 'salePrice': 1, "materialCode": 1, 'offlineTime': 1})
        self.logger.info('全量: %s' % (len(sku_list)))
        for sku in sku_list:
            sku_id = sku.get("_id")
            # cat3_id = sku.get("catalog3Id")
            # price = sku.get("salePrice")
            material_code = sku.get("materialCode")
            # 避免无效采集
            offline_time = sku.get('offlineTime', 0)
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: %s', sku_id)
                continue

            yield Request(
                url=self.item_url.format(sku_id),
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    # 'salePrice': price,
                    # 'catalog3Id': cat3_id,
                    'materialCode': material_code,
                    'skuLink': self.item_url.format(sku_id)
                },
                callback=self.parse_item_data,
                errback=self.error_back
            )

    def parse_item_data(self, response):
        if response.text:
            meta = response.meta
            sku_id = meta.get("skuId")
            item = parse_item_data(response)
            if item:
                # self.logger.info('商品: [%s]' % sku_id)
                # yield item
                yield Request(
                    url=self.price_url.format(sku_id),
                    meta={
                        'reqType': 'price',
                        'batchNo': self.batch_no,
                        'skuId': sku_id,
                        'item': copy.copy(item),
                    },
                    callback=self.parse_price,
                    errback=self.error_back
                )
            else:
                self.logger.error('下架1: sku=%s' % sku_id)

    def parse_price(self, response):
        if response.text:
            meta = response.meta
            sku_id = meta.get("skuId")
            item = parse_price(response)
            if item:
                self.logger.info('商品: [%s]' % sku_id)
                yield item
            else:
                self.logger.error('下架2: sku=%s' % sku_id)


